# delimit;
clear;
set more off;
log using fig3.log, replace;

use data00;
drop populati;
gen violent_a=violent_;
drop violent_;
keep if age <=23 & sex!=3;
sort year dep_ocu age sex;
save one, replace;

use pop_sex;
keep if year<=2000;
sort year dep_ocu age sex;
save pop, replace;

use one;
merge year dep_ocu age sex using pop;

if population!=0 {
  replace death=0 if death==.;
  replace violent_a=0 if violent_a==.;
  replace disease=0 if disease==.;
  replace homicide=0 if homicide==.;
  replace accident=0 if accident==.;
};

gen newage=(age-7)*5;

***keep only males aged 15-59, according to the SAS file - double check the age filter here!;
keep if newage>=15 & newage<=55;
keep if sex==1;

replace newage=15 if newage==20;
replace newage=25 if newage==30;
replace newage=35 if newage==40;
replace newage=45 if newage==50;
replace newage=55 if newage==60;


******************************************
***generate growing department definitions;


gen grow94=1 if 
	dep_ocu==13 |
  	dep_ocu==18 |
	dep_ocu==19 |
	dep_ocu==50 |
	dep_ocu==52 |
	dep_ocu==86 |
	dep_ocu==95 |
	dep_ocu==97 |
	dep_ocu==99;
replace grow94=0 if grow94==.;

gen plante94=1 if 
	grow94==1 |
	dep_ocu==20 |
	dep_ocu==54 |
	dep_ocu==94;
replace plante94=0 if plante94==.;

gen plante94p=1 if
	plante94==1 |
	dep_ocu==44 |
	dep_ocu==47;
replace plante94p=0 if plante94p==.;

replace grow94=plante94p;

gen DMZ=1 if
	dep_ocu==50 |
	dep_ocu==18;
replace DMZ=0 if DMZ==.;

gen prov_type="Non-growing" if grow94==0;
replace prov_type="Growing" if grow94==1;
replace prov_type="DMZ" if grow94==1 & DMZ==1;
drop _merge;

***drop bogota DC, Medellin, Cali;
drop if dep_ocu==5 | dep_ocu==11 | dep_ocu==76;
save temp, replace;

**************************************************;


********************************************************
****collapsing to sum across groups and categories******;
use temp;
collapse (sum) death violent_a accident disease homicide population, by(year prov_type);
sort year prov_type;
save temp1, replace;

******because the variable violent has missing values for some year/department/age combinations, to be consistent with
******the way SAS handles missing values in a sum, I collapse 'violent' separately and merge it back in;
******I suspect 'violent' may still not be handled the way it is in the sas file, but it appears irrelevant to the
******subsequent analysis, so I'm leaving it as is;

use temp;
collapse (sum) violent if violent!=., by(year prov_type);
sort year prov_type;
save temp2, replace;

use temp1;
sort year prov_type;
merge year prov_type using temp2;
********************************************************;

********************************************************;

gen DMZ=1 if prov_type=="DMZ";
replace DMZ=0 if DMZ==.;

gen arate=100000*(accident/population);
gen vrate=100000*(violent_a/population);
gen drate=100000*(disease/population);
gen hrate=100000*(homicide/population);

gen lnvrate=log(violent_a/population) if violent_a>0;
gen lnvratio=log(violent_a/(death-violent_a)) if violent_a>0 & violent_a<death;
gen lndrate=log(disease/population) if disease>0;
gen post=1 if year>=1995;
replace post=0 if year<1995;

gen negyear=-year if year>=1993;
replace negyear=0 if year<1993;

gen d1990=1 if year==1990;
replace d1990=0 if year!=1990;


label variable lnvratio "logit violent death rate";
label variable vrate "violent death rate";
label variable drate "disease death rate";
label variable DMZ "Meta or Cauqeta";

***"remove base period levels" - i.e. demean lnvrate and lndrate by prov_type;

sort prov_type;
by prov_type: egen mean_lnvrate=mean(lnvrate);
by prov_type: egen mean_lndrate=mean(lndrate);
replace lnvrate=lnvrate-mean_lnvrate;
replace lndrate=lndrate-mean_lndrate;

**************************************************;
*****            data summary                *****;
**************************************************;
sort prov_type;
by prov_type: summarize;

**************************************************;
*****              Figure 3a                 *****;
**************************************************;

twoway 
	(scatter lnvrate year if prov_type=="Growing", msymbol(o) mcolor(black) sort c(l) clc(black) clw(medium))
	(scatter lnvrate year if prov_type=="Non-growing", msymbol(i) sort c(l) clp("-") clc(black) clw(medium)),
	
	yline(0 .1 -.1, lpattern(dot) lc(black))
	ytitle("Death rates", si(small))
	ylabel(-.3(.1).4, labs(small) nogrid)
	xlabel(1990(1)2000, labs(small))
	xtitle("Year", si(small))
	legend(row(1) label(1 "Growing") label(2 "Non-growing") subtitle("Province Type", si(small)) si(small))
	title("Fig.3a. Death Rates for Men Aged 15 - 59", si(small))
      note("1. Log rates, relative to average by province type" "2. Non-growing omits Antioquia, Valle, and Bogota DC.")
	name(fig3a, replace)
	saving(fig3a, asis replace)	
;

**************************************************;
*****              Figure 3b                 *****;
**************************************************;

twoway 
	(scatter lndrate year if prov_type=="Growing", msymbol(o) mcolor(black) sort c(l) clc(black) clw(medium))
	(scatter lndrate year if prov_type=="Non-growing", msymbol(i) sort c(l) clp("-") clc(black) clw(medium)),
	
	yline(0 .1 -.1, lpattern(dot) lc(black))
	ytitle("Death rates", si(small))
	ylabel(-.4(.1).5, labs(small) nogrid)
	xlabel(1990(1)2000, labs(small))
	xtitle("Year", si(small))
	legend(row(1) label(1 "Growing") label(2 "Non-growing") subtitle("Province Type", si(small)) si(small))
	title("Fig.3b. Death Rates for Men Aged 15 - 59", si(small))
      note("1. Log rates, relative to average by province type" "2. Non-growing omits Antioquia, Valle, and Bogota DC.")
	name(fig3b, replace)
	saving(fig3b, asis replace)	
;


erase temp.dta;
erase temp1.dta;
erase temp2.dta;
erase one.dta;
erase pop.dta;
log close;

